library(tidyverse)
library(knitr)
library(janitor)
library("readxl")
library(ggfortify)
library(GGally)
library(qtlcharts)
library(leaps)
library(sjPlot)
library(pheatmap)
The data set is adapted from th
data = read.delim("bodyfat.txt") %>% janitor::clean_names()
data = data %>%
mutate(bmi = (data$weight/(data$height ^ 2)) * 703,
overweight = case_when(
bmi >= 25 ~ "yes",
bmi < 25 ~ "no"))
#colnames(data)
data_bmi = data[-c(1:2,4:5,18)]
data_bf = data[-c(1,3:5,17:18)]
data_density = data[-c(2:5,17:18)]
colnames(data_density)
## [1] "density" "neck" "chest" "abdomen" "waist" "hip" "thigh"
## [8] "knee" "ankle" "bicep" "forearm" "wrist"
#glimpse(data)
provide context to the qn!!! why use linear regression on full model…
\[ Percentage of Body Fat = \beta_0 + \beta_1density + \beta_2age + \beta_3weight + \beta_4height\\ + \beta_5neck + \beta_6chest + \beta_7abdomen + \beta_8waist + \beta_9hip + \beta_{10}thigh\\ + \beta_{11}knee + \beta_{12}ankle + \beta_{13}bicep + \beta_{14}forearm + \beta_{15}wrist + \epsilon \]
The residuals \(\epsilon_i\) are iid \(N(0,\sigma^2)\) and there is a linear relationship between y and x.
pbf_lm = lm(pct_bf ~ ., data)
summary(pbf_lm)
##
## Call:
## lm(formula = pct_bf ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.3221 -0.3618 -0.1163 0.2719 14.9213
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.663e+02 2.065e+01 22.582 <2e-16 ***
## density -4.111e+02 8.517e+00 -48.270 <2e-16 ***
## age 1.337e-02 9.791e-03 1.365 0.174
## weight 5.675e-02 4.800e-02 1.182 0.238
## height -2.405e-01 2.435e-01 -0.988 0.324
## neck -1.642e-02 7.096e-02 -0.231 0.817
## chest 2.343e-02 3.306e-02 0.709 0.479
## abdomen 1.800e-02 3.317e-02 0.543 0.588
## waist NA NA NA NA
## hip 4.079e-02 4.576e-02 0.891 0.374
## thigh -2.406e-02 4.435e-02 -0.542 0.588
## knee -3.097e-02 7.469e-02 -0.415 0.679
## ankle -7.468e-02 6.650e-02 -1.123 0.263
## bicep -5.713e-02 5.160e-02 -1.107 0.269
## forearm 1.806e-02 6.267e-02 0.288 0.773
## wrist -1.060e-02 1.653e-01 -0.064 0.949
## bmi -3.414e-01 3.393e-01 -1.006 0.315
## overweightyes 1.655e-01 2.726e-01 0.607 0.544
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.278 on 233 degrees of freedom
## Multiple R-squared: 0.9778, Adjusted R-squared: 0.9763
## F-statistic: 641.3 on 16 and 233 DF, p-value: < 2.2e-16
autoplot(pbf_lm, which = 1:2) + theme_bw()
pbf_step = step(pbf_lm, direction = "backward")
## Start: AIC=138.89
## pct_bf ~ density + age + weight + height + neck + chest + abdomen +
## waist + hip + thigh + knee + ankle + bicep + forearm + wrist +
## bmi + overweight
##
##
## Step: AIC=138.89
## pct_bf ~ density + age + weight + height + neck + chest + abdomen +
## hip + thigh + knee + ankle + bicep + forearm + wrist + bmi +
## overweight
##
## Df Sum of Sq RSS AIC
## - wrist 1 0.0 380.3 136.90
## - neck 1 0.1 380.4 136.95
## - forearm 1 0.1 380.5 136.98
## - knee 1 0.3 380.6 137.08
## - thigh 1 0.5 380.8 137.21
## - abdomen 1 0.5 380.8 137.21
## - overweight 1 0.6 380.9 137.29
## - chest 1 0.8 381.2 137.43
## - hip 1 1.3 381.6 137.75
## - height 1 1.6 381.9 137.94
## - bmi 1 1.7 382.0 137.98
## - bicep 1 2.0 382.3 138.21
## - ankle 1 2.1 382.4 138.24
## - weight 1 2.3 382.6 138.39
## - age 1 3.0 383.4 138.89
## <none> 380.3 138.89
## - density 1 3803.3 4183.6 736.37
##
## Step: AIC=136.9
## pct_bf ~ density + age + weight + height + neck + chest + abdomen +
## hip + thigh + knee + ankle + bicep + forearm + bmi + overweight
##
## Df Sum of Sq RSS AIC
## - neck 1 0.1 380.4 134.97
## - forearm 1 0.1 380.5 134.98
## - knee 1 0.3 380.6 135.09
## - thigh 1 0.5 380.8 135.21
## - abdomen 1 0.5 380.8 135.22
## - overweight 1 0.6 380.9 135.29
## - chest 1 0.8 381.2 135.45
## - hip 1 1.3 381.6 135.75
## - height 1 1.6 382.0 135.96
## - bmi 1 1.7 382.0 136.01
## - bicep 1 2.0 382.4 136.22
## - ankle 1 2.2 382.6 136.37
## - weight 1 2.3 382.6 136.40
## <none> 380.3 136.90
## - age 1 3.4 383.7 137.13
## - density 1 4028.1 4408.4 747.45
##
## Step: AIC=134.97
## pct_bf ~ density + age + weight + height + chest + abdomen +
## hip + thigh + knee + ankle + bicep + forearm + bmi + overweight
##
## Df Sum of Sq RSS AIC
## - forearm 1 0.1 380.5 133.03
## - knee 1 0.3 380.7 133.14
## - abdomen 1 0.5 380.9 133.27
## - thigh 1 0.5 381.0 133.31
## - overweight 1 0.6 381.0 133.35
## - chest 1 0.8 381.3 133.53
## - hip 1 1.5 382.0 133.97
## - height 1 1.6 382.1 134.05
## - bmi 1 1.7 382.2 134.10
## - bicep 1 2.1 382.6 134.35
## - ankle 1 2.2 382.6 134.41
## - weight 1 2.2 382.7 134.43
## <none> 380.4 134.97
## - age 1 3.3 383.7 135.13
## - density 1 4169.9 4550.4 753.37
##
## Step: AIC=133.03
## pct_bf ~ density + age + weight + height + chest + abdomen +
## hip + thigh + knee + ankle + bicep + bmi + overweight
##
## Df Sum of Sq RSS AIC
## - knee 1 0.2 380.8 131.19
## - abdomen 1 0.4 381.0 131.30
## - thigh 1 0.5 381.1 131.38
## - overweight 1 0.6 381.1 131.43
## - chest 1 0.8 381.4 131.58
## - hip 1 1.5 382.0 131.99
## - height 1 1.6 382.1 132.07
## - bmi 1 1.7 382.2 132.12
## - bicep 1 2.0 382.6 132.36
## - weight 1 2.2 382.7 132.47
## - ankle 1 2.2 382.7 132.47
## <none> 380.5 133.03
## - age 1 3.3 383.8 133.17
## - density 1 4170.1 4550.7 751.39
##
## Step: AIC=131.19
## pct_bf ~ density + age + weight + height + chest + abdomen +
## hip + thigh + ankle + bicep + bmi + overweight
##
## Df Sum of Sq RSS AIC
## - abdomen 1 0.4 381.2 129.47
## - overweight 1 0.6 381.3 129.56
## - thigh 1 0.8 381.6 129.71
## - chest 1 0.9 381.7 129.76
## - hip 1 1.3 382.1 130.07
## - height 1 1.5 382.3 130.17
## - bmi 1 1.5 382.3 130.19
## - bicep 1 2.0 382.8 130.49
## - weight 1 2.0 382.8 130.51
## - ankle 1 2.7 383.5 130.94
## - age 1 3.0 383.8 131.17
## <none> 380.8 131.19
## - density 1 4170.0 4550.8 749.40
##
## Step: AIC=129.47
## pct_bf ~ density + age + weight + height + chest + hip + thigh +
## ankle + bicep + bmi + overweight
##
## Df Sum of Sq RSS AIC
## - overweight 1 0.7 381.9 127.92
## - thigh 1 0.8 382.0 128.00
## - chest 1 1.3 382.5 128.31
## - bmi 1 1.4 382.7 128.42
## - height 1 1.5 382.7 128.45
## - hip 1 1.8 383.0 128.66
## - weight 1 2.2 383.4 128.90
## - bicep 1 2.6 383.8 129.15
## <none> 381.2 129.47
## - ankle 1 3.1 384.3 129.51
## - age 1 4.4 385.6 130.35
## - density 1 6089.5 6470.7 835.40
##
## Step: AIC=127.92
## pct_bf ~ density + age + weight + height + chest + hip + thigh +
## ankle + bicep + bmi
##
## Df Sum of Sq RSS AIC
## - thigh 1 0.8 382.7 126.43
## - chest 1 1.3 383.2 126.79
## - bmi 1 1.6 383.5 126.96
## - hip 1 1.6 383.5 126.98
## - height 1 1.9 383.7 127.13
## - bicep 1 2.3 384.2 127.44
## - weight 1 2.7 384.6 127.66
## <none> 381.9 127.92
## - ankle 1 3.1 385.0 127.96
## - age 1 4.5 386.4 128.85
## - density 1 6136.5 6518.4 835.23
##
## Step: AIC=126.43
## pct_bf ~ density + age + weight + height + chest + hip + ankle +
## bicep + bmi
##
## Df Sum of Sq RSS AIC
## - hip 1 1.0 383.7 125.10
## - bmi 1 1.4 384.0 125.32
## - height 1 1.5 384.2 125.41
## - chest 1 2.0 384.6 125.71
## - weight 1 2.2 384.9 125.86
## <none> 382.7 126.43
## - ankle 1 3.2 385.9 126.50
## - bicep 1 3.2 385.9 126.51
## - age 1 6.9 389.6 128.90
## - density 1 6219.1 6601.8 836.41
##
## Step: AIC=125.1
## pct_bf ~ density + age + weight + height + chest + ankle + bicep +
## bmi
##
## Df Sum of Sq RSS AIC
## - bmi 1 0.9 384.6 123.71
## - height 1 1.2 384.9 123.87
## - chest 1 1.4 385.1 124.00
## - weight 1 2.3 386.0 124.60
## <none> 383.7 125.10
## - ankle 1 3.5 387.2 125.39
## - bicep 1 3.7 387.4 125.50
## - age 1 6.3 390.0 127.14
## - density 1 6294.6 6678.3 837.29
##
## Step: AIC=123.71
## pct_bf ~ density + age + weight + height + chest + ankle + bicep
##
## Df Sum of Sq RSS AIC
## - height 1 0.4 385.1 122.00
## - chest 1 1.1 385.7 122.41
## <none> 384.6 123.71
## - bicep 1 3.5 388.2 123.99
## - ankle 1 3.8 388.4 124.14
## - weight 1 5.0 389.7 124.96
## - age 1 6.8 391.4 126.09
## - density 1 6598.1 6982.8 846.43
##
## Step: AIC=122
## pct_bf ~ density + age + weight + chest + ankle + bicep
##
## Df Sum of Sq RSS AIC
## - chest 1 2.5 387.5 121.59
## <none> 385.1 122.00
## - bicep 1 3.2 388.3 122.06
## - ankle 1 3.7 388.8 122.42
## - weight 1 6.0 391.1 123.86
## - age 1 6.6 391.7 124.27
## - density 1 7697.2 8082.3 880.99
##
## Step: AIC=121.59
## pct_bf ~ density + age + weight + ankle + bicep
##
## Df Sum of Sq RSS AIC
## - bicep 1 3.0 390.5 121.49
## <none> 387.5 121.59
## - ankle 1 4.6 392.2 122.55
## - age 1 10.8 398.3 126.44
## - weight 1 27.6 415.1 136.77
## - density 1 8436.8 8824.3 900.95
##
## Step: AIC=121.49
## pct_bf ~ density + age + weight + ankle
##
## Df Sum of Sq RSS AIC
## <none> 390.5 121.49
## - ankle 1 4.5 395.0 122.34
## - age 1 11.5 402.0 126.74
## - weight 1 29.0 419.5 137.41
## - density 1 8435.0 8825.5 898.99
Backwards selection using the AIC dropped all variables except for age, density and abdomen which are kept in the model.
\[ Percentage of Body Fat = 442.3755 - 406.493 \times density\\ + 0.0118 \times age + 0.0576 \times abdomen\\ \] Looking at the \(R^2\) value (multiple R-squared) from the summary output, 98% of the variability of age is explained by the regression on density, age and abdomen circumference.
#options("scipen"=100, "digits"=4)
summary(pbf_step)
##
## Call:
## lm(formula = pct_bf ~ density + age + weight + ankle, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4361 -0.3914 -0.1079 0.2276 15.3411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.565e+02 6.562e+00 69.561 < 2e-16 ***
## density -4.161e+02 5.720e+00 -72.748 < 2e-16 ***
## age 1.827e-02 6.803e-03 2.685 0.00774 **
## weight 1.927e-02 4.517e-03 4.267 2.83e-05 ***
## ankle -1.015e-01 6.047e-02 -1.678 0.09458 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.262 on 245 degrees of freedom
## Multiple R-squared: 0.9772, Adjusted R-squared: 0.9768
## F-statistic: 2625 on 4 and 245 DF, p-value: < 2.2e-16
#options("scipen"=-100, "digits"=4)
autoplot(pbf_step, which = 1:2) + theme_bw()
Due to the increasing consumptions of fast food and the increasing convenience of food deliveries, concerns about obesity level is rising throughput the world and has reached a new high. This increasing concern has lead to an increasing need to measure obesity accurately and percentage body fat is arguably the most accurate measure by far. However, the calculation of body fat is difficult and many has switched to Body Mass Index (BMI) for simpler calculation. This section is looking at comparing the results from predicting body fat percentage using other body measurements and predicting BMI using other body measurements to determine wh body measurement is the most important in determining obesity.
data = read.delim("bodyfat.txt") %>% janitor::clean_names()
glimpse(data)
## Observations: 250
## Variables: 16
## $ density <dbl> 1.0708, 1.0853, 1.0414, 1.0751, 1.0340, 1.0502, 1.0549,…
## $ pct_bf <dbl> 12.3, 6.1, 25.3, 10.4, 28.7, 20.9, 19.2, 12.4, 4.1, 11.…
## $ age <int> 23, 22, 22, 26, 24, 24, 26, 25, 25, 23, 26, 27, 32, 30,…
## $ weight <dbl> 154.25, 173.25, 154.00, 184.75, 184.25, 210.25, 181.00,…
## $ height <dbl> 67.75, 72.25, 66.25, 72.25, 71.25, 74.75, 69.75, 72.50,…
## $ neck <dbl> 36.2, 38.5, 34.0, 37.4, 34.4, 39.0, 36.4, 37.8, 38.1, 4…
## $ chest <dbl> 93.1, 93.6, 95.8, 101.8, 97.3, 104.5, 105.1, 99.6, 100.…
## $ abdomen <dbl> 85.2, 83.0, 87.9, 86.4, 100.0, 94.4, 90.7, 88.5, 82.5, …
## $ waist <dbl> 33.54331, 32.67717, 34.60630, 34.01575, 39.37008, 37.16…
## $ hip <dbl> 94.5, 98.7, 99.2, 101.2, 101.9, 107.8, 100.3, 97.1, 99.…
## $ thigh <dbl> 59.0, 58.7, 59.6, 60.1, 63.2, 66.0, 58.4, 60.0, 62.9, 6…
## $ knee <dbl> 37.3, 37.3, 38.9, 37.3, 42.2, 42.0, 38.3, 39.4, 38.3, 4…
## $ ankle <dbl> 21.9, 23.4, 24.0, 22.8, 24.0, 25.6, 22.9, 23.2, 23.8, 2…
## $ bicep <dbl> 32.0, 30.5, 28.8, 32.4, 32.2, 35.7, 31.9, 30.5, 35.9, 3…
## $ forearm <dbl> 27.4, 28.9, 25.2, 29.4, 27.7, 30.6, 27.8, 29.0, 31.1, 3…
## $ wrist <dbl> 17.1, 18.2, 16.6, 18.2, 17.7, 18.8, 17.7, 18.8, 18.2, 1…
#Introduce BMI Varaible
data=data %>% mutate(bmi=(data$weight/(data$height^2))*703,)
#Isolate the dataset only contain Body Measurements
data_bf = data[-c(1,3:5,17)]
#Isolate the dataset to only contain Body Measurements and as weight and height were included in the BMI formula, it is also removed
data_bmi = data[-c(1:5)]
qtlcharts::iplotCorr(data_bf)
## Set screen size to height=700 x width=1000
Based on the interactive correlation matrix, it can be seen the level of correlation differs quite drastically between the variables and the backward variable selection method is adopted.
bf_lm = lm(pct_bf~.,data=data_bf)
summary(bf_lm)
##
## Call:
## lm(formula = pct_bf ~ ., data = data_bf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8684 -2.9088 -0.1904 3.0491 11.1421
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.20340 6.83392 0.322 0.74742
## neck -0.45612 0.23034 -1.980 0.04882 *
## chest -0.13005 0.09197 -1.414 0.15866
## abdomen 1.03299 0.07638 13.524 < 2e-16 ***
## waist NA NA NA NA
## hip -0.33000 0.12768 -2.585 0.01034 *
## thigh 0.08793 0.13395 0.656 0.51217
## knee -0.13537 0.22744 -0.595 0.55227
## ankle 0.05505 0.21751 0.253 0.80041
## bicep 0.17762 0.17029 1.043 0.29798
## forearm 0.19468 0.20718 0.940 0.34834
## wrist -1.52499 0.50529 -3.018 0.00282 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.341 on 239 degrees of freedom
## Multiple R-squared: 0.737, Adjusted R-squared: 0.726
## F-statistic: 66.98 on 10 and 239 DF, p-value: < 2.2e-16
Using the individual p-value method, the varaibles that need to be dropped are chest, waist, thigh, knee,ankle, bicep, forearm with ankle being the first to drop down due to its high p-value. However, to double check, the AIC criterion will also be considered.
bf_step_back = step(bf_lm, direction = "backward",trace = FALSE)
summary(bf_step_back)
##
## Call:
## lm(formula = pct_bf ~ neck + chest + abdomen + hip + bicep +
## wrist, data = data_bf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.668 -2.889 -0.361 3.210 11.148
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.52703 6.63727 0.230 0.818232
## neck -0.39650 0.22234 -1.783 0.075783 .
## chest -0.12810 0.08992 -1.425 0.155562
## abdomen 1.01805 0.07431 13.700 < 2e-16 ***
## hip -0.28758 0.09232 -3.115 0.002060 **
## bicep 0.26094 0.15160 1.721 0.086469 .
## wrist -1.55084 0.45510 -3.408 0.000767 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.32 on 243 degrees of freedom
## Multiple R-squared: 0.7353, Adjusted R-squared: 0.7287
## F-statistic: 112.5 on 6 and 243 DF, p-value: < 2.2e-16
Based on the backward selection model, the fitted model has become:
$ = 1.52 -0.3965neck - 0.128chest + 1.01805abdomen -0.28758hip + 0.26bicep -1.55084wrist $
Finally, to check assumption, we perform the ggfortify function.
par(mfrow=c(1,2))
plot(bf_step_back,which=1:2) + theme_bw()
## NULL
The QQ plot shows a straight line which indicates that the normality assumption is reasonable. However, the residuals vs fitted plot shows a slight variation; but given that body fat is hard to predict, this is acceptable.
$ = 1.52 -0.3965neck - 0.128chest + 1.01805abdomen -0.28758hip + 0.26bicep -1.55084wrist $
For this analysis, the formula of BMI is \(BMI = \frac{Weight (lbs)*703}{Height(in)^2}\)
qtlcharts::iplotCorr(data_bmi)
Based on the interactive correlation matrix, it can be seen the level of correlation differs quite drastically between the variables and the backward variable selection method is adopted.
bmi_lm = lm(bmi~.,data=data_bmi)
summary(bmi_lm)
##
## Call:
## lm(formula = bmi ~ ., data = data_bmi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1538 -0.6529 0.0036 0.6464 3.7589
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.337205 1.667286 -6.800 8.32e-11 ***
## neck 0.031220 0.056196 0.556 0.579
## chest 0.148829 0.022439 6.633 2.18e-10 ***
## abdomen 0.130813 0.018636 7.020 2.29e-11 ***
## waist NA NA NA NA
## hip 0.048917 0.031149 1.570 0.118
## thigh 0.135537 0.032679 4.147 4.67e-05 ***
## knee -0.253557 0.055488 -4.570 7.84e-06 ***
## ankle 0.056067 0.053066 1.057 0.292
## bicep 0.051276 0.041545 1.234 0.218
## forearm 0.076917 0.050545 1.522 0.129
## wrist 0.005644 0.123276 0.046 0.964
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.059 on 239 degrees of freedom
## Multiple R-squared: 0.9039, Adjusted R-squared: 0.8998
## F-statistic: 224.7 on 10 and 239 DF, p-value: < 2.2e-16
Using the individual p-value method, the varaibles that need to be dropped are hip, ankle, bicep, forearm and wrist. To double check, the AIC criterion will also be considered.
bmi_step_back = step(bmi_lm, direction = "backward",trace = FALSE)
summary(bmi_step_back)
##
## Call:
## lm(formula = bmi ~ chest + abdomen + hip + thigh + knee + forearm,
## data = data_bmi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1197 -0.6944 -0.0274 0.6831 3.8464
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.94257 1.43829 -7.608 6.10e-13 ***
## chest 0.16090 0.02122 7.582 7.18e-13 ***
## abdomen 0.12726 0.01826 6.968 3.01e-11 ***
## hip 0.05047 0.03084 1.637 0.1030
## thigh 0.14983 0.03032 4.942 1.44e-06 ***
## knee -0.23116 0.05148 -4.490 1.10e-05 ***
## forearm 0.11484 0.04468 2.571 0.0108 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.058 on 243 degrees of freedom
## Multiple R-squared: 0.9024, Adjusted R-squared: 0.9
## F-statistic: 374.6 on 6 and 243 DF, p-value: < 2.2e-16
Based on the backward selection model, the fitted model has become:
$ = -10.94 +0.161chest + 0.127abdomen + 0.050hip + 0.150 thigh - 0.23knee + 0.115forearm $
Finally, to check assumption, we perform the ggfortify function.
par(mfrow=c(1,2))
plot(bmi_step_back,which=1:2) + theme_bw()
## NULL
The QQ plot shows a straight line which indicates that the normality assumption is reasonable. However, the residuals vs fitted plot shows a fan shaped plot which indicates that the assumption of homogeneous variance is violated. We can use a log transformed response and re-fit the linear regression.
ln_bmi_lm = lm(log(bmi)~.,data=data_bmi)
summary(ln_bmi_lm)
##
## Call:
## lm(formula = log(bmi) ~ ., data = data_bmi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.131845 -0.026047 0.000653 0.027572 0.107675
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7820756 0.0634046 28.106 < 2e-16 ***
## neck 0.0017372 0.0021370 0.813 0.417073
## chest 0.0054462 0.0008533 6.382 8.98e-10 ***
## abdomen 0.0051190 0.0007087 7.223 6.76e-12 ***
## waist NA NA NA NA
## hip 0.0004404 0.0011846 0.372 0.710402
## thigh 0.0061100 0.0012427 4.917 1.64e-06 ***
## knee -0.0076085 0.0021101 -3.606 0.000379 ***
## ankle 0.0020730 0.0020180 1.027 0.305333
## bicep 0.0025240 0.0015799 1.598 0.111461
## forearm 0.0033017 0.0019222 1.718 0.087150 .
## wrist 0.0009228 0.0046880 0.197 0.844112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04028 on 239 degrees of freedom
## Multiple R-squared: 0.9066, Adjusted R-squared: 0.9027
## F-statistic: 232 on 10 and 239 DF, p-value: < 2.2e-16
ln_bmi_step_back = step(ln_bmi_lm, direction = "backward",trace = FALSE)
summary(ln_bmi_step_back)
##
## Call:
## lm(formula = log(bmi) ~ chest + abdomen + thigh + knee + bicep +
## forearm, data = data_bmi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.129144 -0.024844 0.000147 0.028553 0.111637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.8276641 0.0495199 36.908 < 2e-16 ***
## chest 0.0057533 0.0008218 7.001 2.47e-11 ***
## abdomen 0.0051792 0.0006497 7.972 6.10e-14 ***
## thigh 0.0064286 0.0009988 6.436 6.48e-10 ***
## knee -0.0064618 0.0018831 -3.431 0.000705 ***
## bicep 0.0028200 0.0015464 1.824 0.069436 .
## forearm 0.0039923 0.0018360 2.174 0.030638 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04015 on 243 degrees of freedom
## Multiple R-squared: 0.9056, Adjusted R-squared: 0.9033
## F-statistic: 388.7 on 6 and 243 DF, p-value: < 2.2e-16
par(mfrow=c(1,2))
plot(ln_bmi_step_back,which=1:2) + theme_bw()
## NULL
sjPlot::tab_model(bmi_step_back, ln_bmi_step_back, digits = 5, show.ci = FALSE)
| bmi | log(bmi) | |||
|---|---|---|---|---|
| Predictors | Estimates | p | Estimates | p |
| (Intercept) | -10.94257 | <0.001 | 1.82766 | <0.001 |
| chest | 0.16090 | <0.001 | 0.00575 | <0.001 |
| abdomen | 0.12726 | <0.001 | 0.00518 | <0.001 |
| hip | 0.05047 | 0.103 | ||
| thigh | 0.14983 | <0.001 | 0.00643 | <0.001 |
| knee | -0.23116 | <0.001 | -0.00646 | 0.001 |
| forearm | 0.11484 | 0.011 | 0.00399 | 0.031 |
| bicep | 0.00282 | 0.069 | ||
| Observations | 250 | 250 | ||
| R2 / R2 adjusted | 0.902 / 0.900 | 0.906 / 0.903 | ||
$log() = 1.83 +0.0058chest + 0.0052abdomen + 0.0064 thigh -0.0065knee + 0.0028bicep + 0.0040 forearm $.
sjPlot::tab_model(bf_step_back, ln_bmi_step_back, digits = 5, show.ci = FALSE)
| pct bf | log(bmi) | |||
|---|---|---|---|---|
| Predictors | Estimates | p | Estimates | p |
| (Intercept) | 1.52703 | 0.818 | 1.82766 | <0.001 |
| neck | -0.39650 | 0.076 | ||
| chest | -0.12810 | 0.156 | 0.00575 | <0.001 |
| abdomen | 1.01805 | <0.001 | 0.00518 | <0.001 |
| hip | -0.28758 | 0.002 | ||
| bicep | 0.26094 | 0.086 | 0.00282 | 0.069 |
| wrist | -1.55084 | 0.001 | ||
| thigh | 0.00643 | <0.001 | ||
| knee | -0.00646 | 0.001 | ||
| forearm | 0.00399 | 0.031 | ||
| Observations | 250 | 250 | ||
| R2 / R2 adjusted | 0.735 / 0.729 | 0.906 / 0.903 | ||
Through looking at the two models, we can see that using simply body measurements, it is easier to predict changes in bmi rather than percentage body fat. From the models, measurements of abdomen appears to have the greatest influence on both body fat and bmi and hence any increase should be treated with caution.
Since the body density is calculated based on weight, height, neck and other variables below in our data set. It is better not include it in our full model. \[ Body Weight = \beta_0 + \beta_1age + \beta_2percentageofbodyfat + \beta_3height\\ + \beta_4neck + \beta_5chest + \beta_6abdomen + \beta_7waist + \beta_8hip + \beta_{9}thigh\\ + \beta_{10}knee + \beta_{11}ankle + \beta_{12}bicep + \beta_{13}forearm + \beta_{14}wrist + \epsilon \] ##### 2.2.2 Check Assumptions: The residuals \(\epsilon_i\) are iid \(N(0,\sigma^2)\) and there is a linear relationship between y and x.
dataqf1=within(data,rm(density))
weight_lm = lm(weight ~ ., dataqf1)
summary(weight_lm)
##
## Call:
## lm(formula = weight ~ ., data = dataqf1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.8622 -0.8906 -0.1431 0.7716 6.0540
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.528e+02 3.701e+00 -95.334 < 2e-16 ***
## pct_bf -5.562e-02 2.699e-02 -2.061 0.04038 *
## age -5.065e-04 1.352e-02 -0.037 0.97015
## height 4.808e+00 1.047e-01 45.925 < 2e-16 ***
## neck 1.407e-01 9.742e-02 1.444 0.15006
## chest 6.062e-02 4.536e-02 1.336 0.18269
## abdomen 4.711e-02 4.505e-02 1.046 0.29677
## waist NA NA NA NA
## hip -5.715e-02 6.253e-02 -0.914 0.36163
## thigh 1.245e-01 6.056e-02 2.055 0.04096 *
## knee 2.936e-01 1.010e-01 2.906 0.00401 **
## ankle -1.031e-02 9.160e-02 -0.113 0.91044
## bicep 1.198e-01 7.034e-02 1.703 0.08996 .
## forearm -3.882e-02 8.635e-02 -0.450 0.65346
## wrist -1.409e-02 2.268e-01 -0.062 0.95053
## bmi 6.440e+00 1.980e-01 32.522 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.763 on 235 degrees of freedom
## Multiple R-squared: 0.996, Adjusted R-squared: 0.9957
## F-statistic: 4167 on 14 and 235 DF, p-value: < 2.2e-16
autoplot(weight_lm, which = 1:2) + theme_bw()
In the plot above the residuals are above zero from the beginning, then they go below zero and end up again above zero for the end. This means the linearity assumption fails. We underestimate the weight variable at the start and the end and overestimate the weight at medium. To address this problem, I transform the weight to sqrt(weight).
dataqf=dataqf1%>%mutate(weight=weight^(1/2))
weight_lmqf = lm(weight ~ ., dataqf)
summary(weight_lmqf)
##
## Call:
## lm(formula = weight ~ ., data = dataqf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31815 -0.01805 0.00782 0.02789 0.09803
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.637e+00 1.073e-01 -61.872 < 2e-16 ***
## pct_bf -3.807e-05 7.823e-04 -0.049 0.9612
## age 2.271e-04 3.920e-04 0.579 0.5629
## height 1.810e-01 3.035e-03 59.648 < 2e-16 ***
## neck 5.860e-03 2.824e-03 2.075 0.0391 *
## chest 1.839e-03 1.315e-03 1.399 0.1633
## abdomen 1.154e-03 1.306e-03 0.884 0.3778
## waist NA NA NA NA
## hip -4.131e-03 1.813e-03 -2.279 0.0236 *
## thigh 6.982e-03 1.755e-03 3.977 9.28e-05 ***
## knee 1.180e-02 2.929e-03 4.027 7.61e-05 ***
## ankle 7.045e-04 2.655e-03 0.265 0.7910
## bicep 4.494e-03 2.039e-03 2.204 0.0285 *
## forearm 1.553e-03 2.503e-03 0.621 0.5354
## wrist 3.707e-03 6.576e-03 0.564 0.5734
## bmi 2.358e-01 5.740e-03 41.080 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0511 on 235 degrees of freedom
## Multiple R-squared: 0.9976, Adjusted R-squared: 0.9974
## F-statistic: 6901 on 14 and 235 DF, p-value: < 2.2e-16
autoplot(weight_lmqf, which = 1:2) + theme_bw()
qtlcharts::iplotCorr(dataqf)
It seems there is no direct linear relationship between age and weight. I will do further reserch to work out the appropriate model. ##### 2.2.3 Dropping and adding variables using the AIC starting from the full model
M0 = lm(weight ~ 1, data = dataqf)
step.fwd.aic = step(M0, scope = list(lower = M0, upper = weight_lmqf), direction = "forward", trace = FALSE)
step.back.aic = step(weight_lmqf, direction = "backward", trace = FALSE)
summary(step.back.aic)
##
## Call:
## lm(formula = weight ~ height + neck + chest + hip + thigh + knee +
## bicep + bmi, data = dataqf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32442 -0.01876 0.00895 0.02952 0.10609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.627483 0.093473 -70.903 < 2e-16 ***
## height 0.181653 0.002686 67.629 < 2e-16 ***
## neck 0.007072 0.002562 2.760 0.006221 **
## chest 0.002300 0.001224 1.879 0.061438 .
## hip -0.003906 0.001744 -2.240 0.025983 *
## thigh 0.006039 0.001580 3.823 0.000168 ***
## knee 0.013245 0.002696 4.913 1.65e-06 ***
## bicep 0.004560 0.001898 2.402 0.017048 *
## bmi 0.238754 0.005008 47.671 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05079 on 241 degrees of freedom
## Multiple R-squared: 0.9975, Adjusted R-squared: 0.9975
## F-statistic: 1.222e+04 on 8 and 241 DF, p-value: < 2.2e-16
summary(step.fwd.aic)
##
## Call:
## lm(formula = weight ~ hip + neck + height + bmi + knee + thigh +
## bicep + chest, data = dataqf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32442 -0.01876 0.00895 0.02952 0.10609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.627483 0.093473 -70.903 < 2e-16 ***
## hip -0.003906 0.001744 -2.240 0.025983 *
## neck 0.007072 0.002562 2.760 0.006221 **
## height 0.181653 0.002686 67.629 < 2e-16 ***
## bmi 0.238754 0.005008 47.671 < 2e-16 ***
## knee 0.013245 0.002696 4.913 1.65e-06 ***
## thigh 0.006039 0.001580 3.823 0.000168 ***
## bicep 0.004560 0.001898 2.402 0.017048 *
## chest 0.002300 0.001224 1.879 0.061438 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05079 on 241 degrees of freedom
## Multiple R-squared: 0.9975, Adjusted R-squared: 0.9975
## F-statistic: 1.222e+04 on 8 and 241 DF, p-value: < 2.2e-16
Both forward and backward search using AIC give the same result. ##### 2.2.4 Fitted model for the model selected by the step-wise procedure. \[
sqrt(Weight) = -5.547979 + 0.030800 \times hip+ 0.032385 \times neck + 0.076563 \times height\\
+ 0.031738 \times chest + 0.017835 \times thigh +0.019509 \times abdomen + 0.023096 \times ankle\\
+ 0.018840 \times bicep +0.017246 \times forearm + 0.050826 \times wrist - 0.003182 \times age + 0.018287
\times knee
\] Looking at the \(R^2\) value (multiple R-squared) from the summary output, 98% of the variability of age is explained by the regression on hip, neck, height, chest, thigh, abdomen, ankle, bicep, firearm, wrist, age and knee circumference.
\[ Body Density = \beta_0 + \beta_1Pcf.BF + \beta_2Age + \beta_3Weight + \beta_4Height\\ + \beta_5Neck + \beta_6Chest + \beta_7Abdomen + \beta_8Waist + \beta_9Hip + \beta_{10}Thigh\\ + \beta_{11}Knee + \beta_{12}Ankle + \beta_{13}Bicep + \beta_{14}Forearm + \beta_{15}Wrist + \epsilon \]
#data1<-data_density[,-2]
cor_matrix <- cor(data_density)
pheatmap(cor_matrix, display_numbers = T,na.rm=T)
The residuals \(\epsilon_i\) are iid \(N(0,\sigma^2)\) and there is a linear relationship between y and x.
M0 <- lm(density ~ 1, data = data_density) # Null model
M1 <- lm(density ~ ., data = data_density) # Full model
autoplot(M1,which=1:2)+theme_bw()
round(summary(M1)$coef, 3)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.082 0.016 68.068 0.000
## neck 0.001 0.001 2.074 0.039
## chest 0.000 0.000 1.818 0.070
## abdomen -0.002 0.000 -13.645 0.000
## hip 0.001 0.000 2.950 0.003
## thigh 0.000 0.000 -0.980 0.328
## knee 0.000 0.001 0.689 0.492
## ankle 0.000 0.001 -0.675 0.500
## bicep -0.001 0.000 -1.357 0.176
## forearm 0.000 0.000 -0.947 0.345
## wrist 0.004 0.001 3.310 0.001
step.fwd.aic <- step(M0, scope = list(lower = M0, upper = M1), direction = "forward", trace = FALSE)
summary(step.fwd.aic)
##
## Call:
## lm(formula = density ~ waist + wrist + hip + chest + bicep +
## neck, data = data_density)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.024142 -0.007680 0.000523 0.006156 0.038390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0844660 0.0154768 70.070 < 2e-16 ***
## waist -0.0060402 0.0004401 -13.724 < 2e-16 ***
## wrist 0.0038812 0.0010612 3.657 0.000312 ***
## hip 0.0006990 0.0002153 3.247 0.001331 **
## chest 0.0003881 0.0002097 1.851 0.065427 .
## bicep -0.0007779 0.0003535 -2.201 0.028695 *
## neck 0.0009609 0.0005185 1.853 0.065030 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01007 on 243 degrees of freedom
## Multiple R-squared: 0.722, Adjusted R-squared: 0.7152
## F-statistic: 105.2 on 6 and 243 DF, p-value: < 2.2e-16
step.back.aic <- step(M1, scope = list(lower = M0, upper = M1), direction = "backward", trace = FALSE)
summary(step.back.aic)
##
## Call:
## lm(formula = density ~ neck + chest + abdomen + hip + bicep +
## wrist, data = data_density)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.024142 -0.007680 0.000523 0.006156 0.038390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0844660 0.0154768 70.070 < 2e-16 ***
## neck 0.0009609 0.0005185 1.853 0.065030 .
## chest 0.0003881 0.0002097 1.851 0.065428 .
## abdomen -0.0023780 0.0001733 -13.724 < 2e-16 ***
## hip 0.0006990 0.0002153 3.247 0.001331 **
## bicep -0.0007779 0.0003535 -2.201 0.028694 *
## wrist 0.0038812 0.0010612 3.657 0.000312 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01007 on 243 degrees of freedom
## Multiple R-squared: 0.722, Adjusted R-squared: 0.7152
## F-statistic: 105.2 on 6 and 243 DF, p-value: < 2.2e-16
exh <- regsubsets(density~., data = data_density, nvmax = 15)
## Warning in leaps.exhaustive(a, really.big): XHAUST returned error code -999
plot(exh,scale="bic")
M2<- lm(formula = density ~ neck + chest + abdomen,
data = data_density)
summary(M2)
##
## Call:
## lm(formula = density ~ neck + chest + abdomen, data = data_density)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.029395 -0.007156 -0.000682 0.007305 0.046687
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1392164 0.0119499 95.333 < 2e-16 ***
## neck 0.0017212 0.0004599 3.743 0.000226 ***
## chest 0.0004569 0.0002135 2.140 0.033361 *
## abdomen -0.0021095 0.0001592 -13.250 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01057 on 246 degrees of freedom
## Multiple R-squared: 0.6904, Adjusted R-squared: 0.6867
## F-statistic: 182.9 on 3 and 246 DF, p-value: < 2.2e-16
M3<- lm(formula = density ~ neck + chest + abdomen + waist ,
data = data_density)
summary(M3)
##
## Call:
## lm(formula = density ~ neck + chest + abdomen + waist, data = data_density)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.029395 -0.007156 -0.000682 0.007305 0.046687
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1392164 0.0119499 95.333 < 2e-16 ***
## neck 0.0017212 0.0004599 3.743 0.000226 ***
## chest 0.0004569 0.0002135 2.140 0.033361 *
## abdomen -0.0021095 0.0001592 -13.250 < 2e-16 ***
## waist NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01057 on 246 degrees of freedom
## Multiple R-squared: 0.6904, Adjusted R-squared: 0.6867
## F-statistic: 182.9 on 3 and 246 DF, p-value: < 2.2e-16
Drop waist and add other variables
M4<- lm(formula = density ~ neck + chest + abdomen + hip ,
data = data_density)
summary(M4)
##
## Call:
## lm(formula = density ~ neck + chest + abdomen + hip, data = data_density)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.028989 -0.007256 0.000047 0.006767 0.045116
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1150223 0.0139695 79.818 < 2e-16 ***
## neck 0.0014682 0.0004584 3.203 0.00154 **
## chest 0.0003734 0.0002113 1.768 0.07837 .
## abdomen -0.0023671 0.0001759 -13.455 < 2e-16 ***
## hip 0.0006619 0.0002074 3.191 0.00160 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01037 on 245 degrees of freedom
## Multiple R-squared: 0.7028, Adjusted R-squared: 0.6979
## F-statistic: 144.8 on 4 and 245 DF, p-value: < 2.2e-16
M5<- lm(formula = density ~ neck + chest + abdomen + hip + thigh ,
data = data_density)
summary(M5)
##
## Call:
## lm(formula = density ~ neck + chest + abdomen + hip + thigh,
## data = data_density)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.029545 -0.006988 0.000516 0.007098 0.043367
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1073577 0.0144127 76.832 < 2e-16 ***
## neck 0.0016460 0.0004644 3.544 0.000472 ***
## chest 0.0003393 0.0002107 1.610 0.108626
## abdomen -0.0023869 0.0001752 -13.627 < 2e-16 ***
## hip 0.0010639 0.0002889 3.682 0.000285 ***
## thigh -0.0005716 0.0002878 -1.986 0.048171 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01031 on 244 degrees of freedom
## Multiple R-squared: 0.7075, Adjusted R-squared: 0.7015
## F-statistic: 118 on 5 and 244 DF, p-value: < 2.2e-16
Drop chest and add other variables
M6<- lm(formula = density ~ neck + abdomen + hip + thigh + knee ,
data = data_density)
summary(M6)
##
## Call:
## lm(formula = density ~ neck + abdomen + hip + thigh + knee, data = data_density)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.029361 -0.007760 0.000360 0.007152 0.043816
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1035095 0.0150104 73.517 < 2e-16 ***
## neck 0.0018149 0.0004395 4.129 5e-05 ***
## abdomen -0.0022098 0.0001347 -16.402 < 2e-16 ***
## hip 0.0010035 0.0002984 3.363 0.000894 ***
## thigh -0.0007038 0.0002938 -2.395 0.017355 *
## knee 0.0007551 0.0005006 1.508 0.132778
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01032 on 244 degrees of freedom
## Multiple R-squared: 0.7071, Adjusted R-squared: 0.7011
## F-statistic: 117.8 on 5 and 244 DF, p-value: < 2.2e-16
Drop knee
M7<- lm(formula = density ~ neck + abdomen + hip + thigh ,
data = data_density)
summary(M7)
##
## Call:
## lm(formula = density ~ neck + abdomen + hip + thigh, data = data_density)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.030204 -0.007301 0.000653 0.007199 0.045107
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1104052 0.0143343 77.465 < 2e-16 ***
## neck 0.0019085 0.0004362 4.375 1.8e-05 ***
## abdomen -0.0022064 0.0001351 -16.337 < 2e-16 ***
## hip 0.0011314 0.0002868 3.945 0.000104 ***
## thigh -0.0006094 0.0002878 -2.117 0.035236 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01035 on 245 degrees of freedom
## Multiple R-squared: 0.7044, Adjusted R-squared: 0.6996
## F-statistic: 146 on 4 and 245 DF, p-value: < 2.2e-16
relweights <- function(fit,...){
R <- cor(fit$model)
nvar <- ncol(R)
rxx <- R[2:nvar, 2:nvar]
rxy <- R[2:nvar, 1]
svd <- eigen(rxx)
evec <- svd$vectors
ev <- svd$values
delta <- diag(sqrt(ev))
lambda <- evec %*% delta %*% t(evec)
lambdasq <- lambda ^ 2
beta <- solve(lambda) %*% rxy
rsquare <- colSums(beta ^ 2)
rawwgt <- lambdasq %*% beta ^ 2
import <- (rawwgt / rsquare) * 100
import <- as.data.frame(import)
row.names(import) <- names(fit$model[2:nvar])
names(import) <- "Weights"
import <- import[order(import),1, drop=FALSE]
dotchart(import$Weights, labels=row.names(import),
xlab="% of R-Square", pch=19,
main="Relative Importance of Predictor Variables",
sub=paste("Total R-Square=", round(rsquare, digits=3)),
...)
return(import)
}
relweights(M7, col="blue")
## Weights
## neck 11.06366
## thigh 13.82063
## hip 19.73563
## abdomen 55.38008
Obviously, abdomen contributes the most in the relationship with body density.
autoplot(M7,which=1:2)+theme_bw()
Fit a simple linear regression to the data to assess whether the age has an influence on the percentage of body fat. Taking the log of % of body fat improves the fit by altering the scale and making the variable more “normally” distributed. \[ X = \beta_0 + \beta_1log(Y) + \epsilon \]
p = data %>% ggplot() + aes(x = age, y = pct_bf) + geom_point() +
geom_smooth(method = "lm", se = FALSE) + theme_bw() +
scale_y_continuous(labels = scales::number) +
scale_x_continuous(labels = scales::number) +
labs(x = "Age", y = "Body Fat Percentage", title = "Proportion of Body Fat Percentage based on Age", fill = "Percentage of Body Fat", caption = "Source: SOCR Data BMI Regression") +
scale_y_log10()
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
p
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
data.lm = lm(log1p(pct_bf) ~ age, data)
summary(data.lm)
##
## Call:
## lm(formula = log1p(pct_bf) ~ age, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8296 -0.2566 0.1179 0.3431 0.9251
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.368207 0.118541 19.978 < 2e-16 ***
## age 0.011535 0.002542 4.537 8.88e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5076 on 248 degrees of freedom
## Multiple R-squared: 0.07665, Adjusted R-squared: 0.07293
## F-statistic: 20.59 on 1 and 248 DF, p-value: 8.883e-06
A one year increase in age would lead to a 1.15% increase in percentage of body fat.
predict(data.lm, data = data.frame(x = 50), interval = "prediction", level = 0.95)
## Warning in predict.lm(data.lm, data = data.frame(x = 50), interval = "prediction", : predictions on current data refer to _future_ responses
## fit lwr upr
## 1 2.633523 1.625837 3.641209
## 2 2.621988 1.613745 3.630231
## 3 2.621988 1.613745 3.630231
## 4 2.668130 1.661966 3.674293
## 5 2.645059 1.637905 3.652212
## 6 2.645059 1.637905 3.652212
## 7 2.668130 1.661966 3.674293
## 8 2.656594 1.649948 3.663240
## 9 2.656594 1.649948 3.663240
## 10 2.633523 1.625837 3.641209
## 11 2.668130 1.661966 3.674293
## 12 2.679665 1.673960 3.685370
## 13 2.737342 1.733557 3.741128
## 14 2.714271 1.709793 3.718750
## 15 2.771949 1.769017 3.774881
## 16 2.771949 1.769017 3.774881
## 17 2.760413 1.757222 3.763605
## 18 2.737342 1.733557 3.741128
## 19 2.691201 1.685929 3.696472
## 20 2.748878 1.745402 3.752354
## 21 2.691201 1.685929 3.696472
## 22 2.691201 1.685929 3.696472
## 23 2.725807 1.721687 3.729927
## 24 2.737342 1.733557 3.741128
## 25 2.691201 1.685929 3.696472
## 26 2.679665 1.673960 3.685370
## 27 2.760413 1.757222 3.763605
## 28 2.725807 1.721687 3.729927
## 29 2.679665 1.673960 3.685370
## 30 2.702736 1.697873 3.707599
## 31 2.737342 1.733557 3.741128
## 32 2.702736 1.697873 3.707599
## 33 2.679665 1.673960 3.685370
## 34 2.841162 1.839263 3.843061
## 35 2.841162 1.839263 3.843061
## 36 2.933445 1.931523 3.935368
## 37 2.829626 1.827618 3.831635
## 38 2.944981 1.942943 3.947019
## 39 2.944981 1.942943 3.947019
## 40 2.887304 1.885593 3.889014
## 41 2.921910 1.920078 3.923742
## 42 2.841162 1.839263 3.843061
## 43 2.818091 1.815947 3.820234
## 44 2.864233 1.862478 3.865987
## 45 2.829626 1.827618 3.831635
## 46 2.818091 1.815947 3.820234
## 47 2.887304 1.885593 3.889014
## 48 2.910375 1.908608 3.912141
## 49 2.910375 1.908608 3.912141
## 50 2.829626 1.827618 3.831635
## 51 2.956516 1.954338 3.958695
## 52 2.933445 1.931523 3.935368
## 53 2.852697 1.850883 3.854511
## 54 2.991123 1.988373 3.993873
## 55 3.037265 2.033404 4.041126
## 56 3.083407 2.078037 4.088777
## 57 2.991123 1.988373 3.993873
## 58 3.071871 2.066916 4.076827
## 59 3.083407 2.078037 4.088777
## 60 3.014194 2.010938 4.017449
## 61 2.991123 1.988373 3.993873
## 62 3.071871 2.066916 4.076827
## 63 3.025729 2.022184 4.029275
## 64 3.002658 1.999668 4.005648
## 65 2.991123 1.988373 3.993873
## 66 3.002658 1.999668 4.005648
## 67 2.991123 1.988373 3.993873
## 68 3.002658 1.999668 4.005648
## 69 3.083407 2.078037 4.088777
## 70 3.002658 1.999668 4.005648
## 71 3.014194 2.010938 4.017449
## 72 3.002658 1.999668 4.005648
## 73 3.071871 2.066916 4.076827
## 74 3.071871 2.066916 4.076827
## 75 3.025729 2.022184 4.029275
## 76 3.164155 2.155192 4.173118
## 77 3.302581 2.284677 4.320485
## 78 3.129549 2.122273 4.136824
## 79 3.141084 2.133271 4.148897
## 80 3.106478 2.100204 4.112751
## 81 3.106478 2.100204 4.112751
## 82 3.175690 2.166116 4.185265
## 83 3.198761 2.187891 4.209632
## 84 3.141084 2.133271 4.148897
## 85 3.198761 2.187891 4.209632
## 86 3.106478 2.100204 4.112751
## 87 2.898839 1.897113 3.900565
## 88 2.921910 1.920078 3.923742
## 89 2.898839 1.897113 3.900565
## 90 2.875768 1.874048 3.877488
## 91 2.910375 1.908608 3.912141
## 92 2.898839 1.897113 3.900565
## 93 2.910375 1.908608 3.912141
## 94 2.979587 1.977053 3.982122
## 95 2.806555 1.804252 3.808858
## 96 2.944981 1.942943 3.947019
## 97 2.898839 1.897113 3.900565
## 98 2.910375 1.908608 3.912141
## 99 2.933445 1.931523 3.935368
## 100 2.921910 1.920078 3.923742
## 101 2.841162 1.839263 3.843061
## 102 2.933445 1.931523 3.935368
## 103 2.864233 1.862478 3.865987
## 104 2.864233 1.862478 3.865987
## 105 2.864233 1.862478 3.865987
## 106 2.968052 1.965708 3.970396
## 107 2.864233 1.862478 3.865987
## 108 2.829626 1.827618 3.831635
## 109 2.864233 1.862478 3.865987
## 110 2.864233 1.862478 3.865987
## 111 2.910375 1.908608 3.912141
## 112 2.852697 1.850883 3.854511
## 113 2.921910 1.920078 3.923742
## 114 2.829626 1.827618 3.831635
## 115 2.921910 1.920078 3.923742
## 116 2.956516 1.954338 3.958695
## 117 2.829626 1.827618 3.831635
## 118 2.875768 1.874048 3.877488
## 119 2.968052 1.965708 3.970396
## 120 2.875768 1.874048 3.877488
## 121 2.829626 1.827618 3.831635
## 122 2.910375 1.908608 3.912141
## 123 2.944981 1.942943 3.947019
## 124 2.898839 1.897113 3.900565
## 125 2.852697 1.850883 3.854511
## 126 2.864233 1.862478 3.865987
## 127 2.829626 1.827618 3.831635
## 128 2.852697 1.850883 3.854511
## 129 2.933445 1.931523 3.935368
## 130 2.829626 1.827618 3.831635
## 131 2.910375 1.908608 3.912141
## 132 2.944981 1.942943 3.947019
## 133 2.841162 1.839263 3.843061
## 134 2.875768 1.874048 3.877488
## 135 2.818091 1.815947 3.820234
## 136 2.864233 1.862478 3.865987
## 137 2.829626 1.827618 3.831635
## 138 2.933445 1.931523 3.935368
## 139 2.829626 1.827618 3.831635
## 140 2.829626 1.827618 3.831635
## 141 2.968052 1.965708 3.970396
## 142 2.633523 1.625837 3.641209
## 143 2.633523 1.625837 3.641209
## 144 2.645059 1.637905 3.652212
## 145 2.645059 1.637905 3.652212
## 146 2.656594 1.649948 3.663240
## 147 2.656594 1.649948 3.663240
## 148 2.668130 1.661966 3.674293
## 149 2.668130 1.661966 3.674293
## 150 2.668130 1.661966 3.674293
## 151 2.679665 1.673960 3.685370
## 152 2.679665 1.673960 3.685370
## 153 2.679665 1.673960 3.685370
## 154 2.691201 1.685929 3.696472
## 155 2.691201 1.685929 3.696472
## 156 2.691201 1.685929 3.696472
## 157 2.714271 1.709793 3.718750
## 158 2.725807 1.721687 3.729927
## 159 2.725807 1.721687 3.729927
## 160 2.748878 1.745402 3.752354
## 161 2.748878 1.745402 3.752354
## 162 2.760413 1.757222 3.763605
## 163 2.760413 1.757222 3.763605
## 164 2.771949 1.769017 3.774881
## 165 2.771949 1.769017 3.774881
## 166 2.771949 1.769017 3.774881
## 167 2.771949 1.769017 3.774881
## 168 2.771949 1.769017 3.774881
## 169 2.771949 1.769017 3.774881
## 170 2.771949 1.769017 3.774881
## 171 2.771949 1.769017 3.774881
## 172 2.783484 1.780787 3.786182
## 173 2.783484 1.780787 3.786182
## 174 2.795020 1.792532 3.797508
## 175 2.795020 1.792532 3.797508
## 176 2.795020 1.792532 3.797508
## 177 2.806555 1.804252 3.808858
## 178 2.818091 1.815947 3.820234
## 179 2.818091 1.815947 3.820234
## 180 2.829626 1.827618 3.831635
## 181 2.829626 1.827618 3.831635
## 182 2.829626 1.827618 3.831635
## 183 2.829626 1.827618 3.831635
## 184 2.829626 1.827618 3.831635
## 185 2.841162 1.839263 3.843061
## 186 2.841162 1.839263 3.843061
## 187 2.841162 1.839263 3.843061
## 188 2.841162 1.839263 3.843061
## 189 2.841162 1.839263 3.843061
## 190 2.852697 1.850883 3.854511
## 191 2.852697 1.850883 3.854511
## 192 2.852697 1.850883 3.854511
## 193 2.852697 1.850883 3.854511
## 194 2.852697 1.850883 3.854511
## 195 2.852697 1.850883 3.854511
## 196 2.852697 1.850883 3.854511
## 197 2.852697 1.850883 3.854511
## 198 2.864233 1.862478 3.865987
## 199 2.864233 1.862478 3.865987
## 200 2.864233 1.862478 3.865987
## 201 2.864233 1.862478 3.865987
## 202 2.875768 1.874048 3.877488
## 203 2.875768 1.874048 3.877488
## 204 2.875768 1.874048 3.877488
## 205 2.875768 1.874048 3.877488
## 206 2.910375 1.908608 3.912141
## 207 2.910375 1.908608 3.912141
## 208 2.910375 1.908608 3.912141
## 209 2.933445 1.931523 3.935368
## 210 2.933445 1.931523 3.935368
## 211 2.933445 1.931523 3.935368
## 212 2.944981 1.942943 3.947019
## 213 2.944981 1.942943 3.947019
## 214 2.956516 1.954338 3.958695
## 215 2.956516 1.954338 3.958695
## 216 2.956516 1.954338 3.958695
## 217 2.968052 1.965708 3.970396
## 218 2.979587 1.977053 3.982122
## 219 2.991123 1.988373 3.993873
## 220 2.991123 1.988373 3.993873
## 221 2.991123 1.988373 3.993873
## 222 3.002658 1.999668 4.005648
## 223 3.002658 1.999668 4.005648
## 224 3.002658 1.999668 4.005648
## 225 3.002658 1.999668 4.005648
## 226 3.002658 1.999668 4.005648
## 227 3.014194 2.010938 4.017449
## 228 3.014194 2.010938 4.017449
## 229 3.025729 2.022184 4.029275
## 230 3.025729 2.022184 4.029275
## 231 3.037265 2.033404 4.041126
## 232 3.037265 2.033404 4.041126
## 233 3.060336 2.055770 4.064901
## 234 3.083407 2.078037 4.088777
## 235 3.083407 2.078037 4.088777
## 236 3.094942 2.089133 4.100751
## 237 3.106478 2.100204 4.112751
## 238 3.118013 2.111251 4.124775
## 239 3.118013 2.111251 4.124775
## 240 3.118013 2.111251 4.124775
## 241 3.129549 2.122273 4.136824
## 242 3.141084 2.133271 4.148897
## 243 3.141084 2.133271 4.148897
## 244 3.152619 2.144244 4.160995
## 245 3.164155 2.155192 4.173118
## 246 3.175690 2.166116 4.185265
## 247 3.198761 2.187891 4.209632
## 248 3.198761 2.187891 4.209632
## 249 3.198761 2.187891 4.209632
## 250 3.221832 2.209568 4.234097
p + geom_segment(aes(y = 0, yend = 2.633523, x = 50, xend = 50),
colour = "gray") + geom_segment(aes(y = 2.633523, yend = 2.633523,
x = 25, xend = 50), colour = "gray") + scale_x_continuous(limits = c(25, 75), expand = c(0, 0), labels = scales::number) + scale_y_continuous(limits = c(0, 50), expand = c(0, 0), labels = scales::number)
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).
par(mfrow = c(1, 2))
plot(data.lm, which = 1:2)
autoplot(data.lm, which = 1:2) + theme_bw()
tlm = lm(pct_bf ~ log(age), data)
summary(tlm)
##
## Call:
## lm(formula = pct_bf ~ log(age), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.4197 -6.2216 0.2241 5.2902 27.0649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.182 6.469 -1.883 0.0609 .
## log(age) 8.296 1.714 4.840 2.29e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.944 on 248 degrees of freedom
## Multiple R-squared: 0.08629, Adjusted R-squared: 0.08261
## F-statistic: 23.42 on 1 and 248 DF, p-value: 2.288e-06
autoplot(tlm, which = 1:2) + theme_bw()
#install.packages("tidyr")
#install.packages("sjPlot")
ttlm = lm(log1p(pct_bf) ~ log(age), data)
sjPlot::tab_model(data.lm, tlm, ttlm, digits = 5, show.ci = FALSE)
| log 1 p(pct bf) | pct bf | log 1 p(pct bf) | ||||
|---|---|---|---|---|---|---|
| Predictors | Estimates | p | Estimates | p | Estimates | p |
| (Intercept) | 2.36821 | <0.001 | -12.18239 | 0.061 | 1.02224 | 0.014 |
| age | 0.01154 | <0.001 | ||||
| log(age) | 8.29576 | <0.001 | 0.49532 | <0.001 | ||
| Observations | 250 | 250 | 250 | |||
| R2 / R2 adjusted | 0.077 / 0.073 | 0.086 / 0.083 | 0.076 / 0.072 | |||
provide context to the qn!!! why use linear regression on full model…
\[ Age = \beta_0 + \beta_1density + \beta_1pctBodyFat + \beta_2weight + \beta_3height + \beta_4neck + \beta_5chest + \\ \beta_6abdomen + \beta_7waist + \beta_8hip + \beta_9thigh + \beta_{10}knee + \beta_{11}ankle + \beta_{12}bicep + \\ \beta_{13}forearm + \beta_{14}wrist + \epsilon \]
The residuals \(\epsilon_i\) are iid \(N(0,\sigma^2)\) and there is a linear relationship between y and x.
age_lm = lm(age ~ ., data)
summary(age_lm)
##
## Call:
## lm(formula = age ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.1205 -5.1938 0.1434 5.8938 22.0763
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -43.24027 245.61885 -0.176 0.860410
## density 121.26738 187.99650 0.645 0.519526
## pct_bf 0.57880 0.43458 1.332 0.184205
## weight -0.03828 0.31775 -0.120 0.904209
## height -2.13847 1.60109 -1.336 0.182966
## neck 0.63745 0.47088 1.354 0.177120
## chest 0.28603 0.21978 1.301 0.194395
## abdomen 0.99400 0.20995 4.735 3.80e-06 ***
## waist NA NA NA NA
## hip -0.27058 0.30326 -0.892 0.373190
## thigh -1.54957 0.27784 -5.577 6.72e-08 ***
## knee 1.79738 0.48283 3.723 0.000247 ***
## ankle -0.66679 0.44212 -1.508 0.132863
## bicep 0.32423 0.34275 0.946 0.345148
## forearm -0.77606 0.41415 -1.874 0.062193 .
## wrist 6.18153 1.02127 6.053 5.61e-09 ***
## bmi -2.94254 2.25054 -1.307 0.192331
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.514 on 234 degrees of freedom
## Multiple R-squared: 0.5744, Adjusted R-squared: 0.5472
## F-statistic: 21.06 on 15 and 234 DF, p-value: < 2.2e-16
autoplot(age_lm, which = 1:2) + theme_bw()
age_step = step(age_lm, direction = "backward")
## Start: AIC=1086.32
## age ~ density + pct_bf + weight + height + neck + chest + abdomen +
## waist + hip + thigh + knee + ankle + bicep + forearm + wrist +
## bmi
##
##
## Step: AIC=1086.32
## age ~ density + pct_bf + weight + height + neck + chest + abdomen +
## hip + thigh + knee + ankle + bicep + forearm + wrist + bmi
##
## Df Sum of Sq RSS AIC
## - weight 1 1.05 16963 1084.3
## - density 1 30.16 16992 1084.8
## - hip 1 57.71 17020 1085.2
## - bicep 1 64.87 17027 1085.3
## - chest 1 122.77 17085 1086.1
## - bmi 1 123.92 17086 1086.1
## - pct_bf 1 128.58 17091 1086.2
## - height 1 129.31 17092 1086.2
## - neck 1 132.85 17095 1086.3
## <none> 16962 1086.3
## - ankle 1 164.88 17127 1086.7
## - forearm 1 254.54 17217 1088.0
## - knee 1 1004.53 17967 1098.7
## - abdomen 1 1624.90 18587 1107.2
## - thigh 1 2254.76 19217 1115.5
## - wrist 1 2655.73 19618 1120.7
##
## Step: AIC=1084.34
## age ~ density + pct_bf + height + neck + chest + abdomen + hip +
## thigh + knee + ankle + bicep + forearm + wrist + bmi
##
## Df Sum of Sq RSS AIC
## - density 1 29.21 16993 1082.8
## - hip 1 56.90 17020 1083.2
## - bicep 1 63.82 17027 1083.3
## - chest 1 121.76 17085 1084.1
## - pct_bf 1 127.54 17091 1084.2
## - neck 1 131.80 17095 1084.3
## <none> 16963 1084.3
## - ankle 1 165.02 17128 1084.8
## - forearm 1 253.82 17217 1086.0
## - bmi 1 846.07 17810 1094.5
## - knee 1 1030.02 17994 1097.1
## - abdomen 1 1629.51 18593 1105.3
## - height 1 1677.56 18641 1105.9
## - thigh 1 2322.94 19286 1114.4
## - wrist 1 2658.31 19622 1118.7
##
## Step: AIC=1082.77
## age ~ pct_bf + height + neck + chest + abdomen + hip + thigh +
## knee + ankle + bicep + forearm + wrist + bmi
##
## Df Sum of Sq RSS AIC
## - hip 1 50.94 17044 1081.5
## - bicep 1 57.58 17050 1081.6
## - chest 1 134.22 17127 1082.7
## - neck 1 136.37 17129 1082.8
## <none> 16993 1082.8
## - ankle 1 180.17 17173 1083.4
## - forearm 1 255.59 17248 1084.5
## - pct_bf 1 425.59 17418 1087.0
## - bmi 1 848.35 17841 1093.0
## - knee 1 1032.06 18025 1095.5
## - abdomen 1 1601.63 18594 1103.3
## - height 1 1667.12 18660 1104.2
## - thigh 1 2366.73 19359 1113.4
## - wrist 1 2730.53 19723 1118.0
##
## Step: AIC=1081.52
## age ~ pct_bf + height + neck + chest + abdomen + thigh + knee +
## ankle + bicep + forearm + wrist + bmi
##
## Df Sum of Sq RSS AIC
## - bicep 1 67.40 17111 1080.5
## <none> 17044 1081.5
## - ankle 1 167.14 17211 1082.0
## - chest 1 175.85 17219 1082.1
## - neck 1 182.79 17226 1082.2
## - forearm 1 243.33 17287 1083.1
## - pct_bf 1 462.70 17506 1086.2
## - knee 1 1001.01 18045 1093.8
## - bmi 1 1417.80 18461 1099.5
## - abdomen 1 1550.95 18594 1101.3
## - wrist 1 2753.58 19797 1117.0
## - height 1 2764.06 19808 1117.1
## - thigh 1 3012.53 20056 1120.2
##
## Step: AIC=1080.5
## age ~ pct_bf + height + neck + chest + abdomen + thigh + knee +
## ankle + forearm + wrist + bmi
##
## Df Sum of Sq RSS AIC
## <none> 17111 1080.5
## - ankle 1 178.13 17289 1081.1
## - forearm 1 192.84 17304 1081.3
## - chest 1 194.85 17306 1081.3
## - neck 1 209.14 17320 1081.5
## - pct_bf 1 493.44 17604 1085.6
## - knee 1 988.36 18099 1092.5
## - bmi 1 1359.87 18471 1097.6
## - abdomen 1 1487.35 18598 1099.3
## - height 1 2700.19 19811 1115.1
## - wrist 1 2841.19 19952 1116.9
## - thigh 1 2964.74 20076 1118.5
Backwards selection using the AIC dropped variables waist, height, density, bicep and hip but decided to keep chest in the model.
\[ Age = -74.36348 + 0.3153 \times pctBodyFat -0.4782 \times weight + 0.8185 \times neck\\ + 0.32544 \times chest + 0.8824 \times abdomen -1.6054 \times thigh + 1.8424 \times knee\\ - 0.7486 \times ankle -0.6929 \times forearm + 6.2789 \times wrist \] Looking at the \(R^2\) value (multiple R-squared) from the summary output, 50% of the variability of age is explained by the regression on percentage of body fat, weight, neck, chest, abdomen, thigh, knee, ankle, forearm and wrist.
summary(age_step)
##
## Call:
## lm(formula = age ~ pct_bf + height + neck + chest + abdomen +
## thigh + knee + ankle + forearm + wrist + bmi, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.0458 -5.2040 0.2848 5.6562 21.6013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 98.0750 16.1779 6.062 5.22e-09 ***
## pct_bf 0.3330 0.1271 2.620 0.00936 **
## height -2.4808 0.4048 -6.128 3.65e-09 ***
## neck 0.7733 0.4534 1.706 0.08939 .
## chest 0.3496 0.2123 1.646 0.10103
## abdomen 0.9131 0.2008 4.548 8.62e-06 ***
## thigh -1.5923 0.2480 -6.422 7.25e-10 ***
## knee 1.7430 0.4701 3.708 0.00026 ***
## ankle -0.6872 0.4366 -1.574 0.11680
## forearm -0.6485 0.3960 -1.638 0.10279
## wrist 6.3441 1.0092 6.286 1.54e-09 ***
## bmi -3.4753 0.7991 -4.349 2.03e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.479 on 238 degrees of freedom
## Multiple R-squared: 0.5707, Adjusted R-squared: 0.5509
## F-statistic: 28.77 on 11 and 238 DF, p-value: < 2.2e-16
autoplot(age_step, which = 1:2) + theme_bw()